#include "our_gl.h"
#include "mathengine.h"

#include <iostream>

using namespace mathengine;

Matrix44f ModelView;
Matrix44f Viewport;
Matrix44f Projection;

const float depth = 255.f; //2000.f;

// Viewport=
// (w/2  0   0        x+w/2
//  0    h/2 0        y+h/2
//  0    0   depth/2  depth/2
//  0    0   0        1       )
// https://www.cnblogs.com/fortunely/p/18106741
void viewport(int x, int y, int w, int h) {
    Viewport = Matrix44f::identity();
    Viewport.data_[0][3] = x + w / 2.f;
    Viewport.data_[1][3] = y + h / 2.f;
    Viewport.data_[2][3] = depth / 2.f;
    // Viewport.data_[2][3] = 1.f;
    Viewport.data_[0][0] = w / 2.f;
    Viewport.data_[1][1] = h / 2.f;
    Viewport.data_[2][2] = depth / 2.f;
    // Viewport.data_[2][2] = 0;
}

// Projection=
// (1
//    1
//       1
//       r  1), r = -1/c, c is camera's z coordinate
// )
// https://www.cnblogs.com/fortunely/p/18819642
void projection(float coeff) {
    Projection = Matrix44f::identity();
    Projection.data_[3][2] = coeff;
}

// coordinate transformation from world space to eye/camera space
//
// https://www.cnblogs.com/fortunely/p/18709389
void lookat(Vec3f eye, Vec3f center, Vec3f up) {
    Vec3f z = (eye - center).normalize();
    Vec3f x = crossProduct(up, z).normalize();
    Vec3f y = crossProduct(z, x).normalize();

    Matrix44f Minv = Matrix44f::identity();
    Matrix44f Tr   = Matrix44f::identity();
    for (int i = 0; i < 3; i++) {
        Minv.setValue(0, i, x[i]);
        Minv.setValue(1, i, y[i]);
        Minv.setValue(2, i, z[i]);
        Tr.setValue(i, 3, -center[i]);
    }
    ModelView = Minv * Tr;
}

// get barycentric coordinate of P with cross product
// P = (1- ux/uz - uy/uz) A + ux/uz B + uy/uz C, uz!=0
// vec(u) = (ux,uy,uz) = vec(s1) x vec(s2)
// =(Bx-Ax,Cx-Ax,Ax-Px) x (By-Ay,Cy-Ay,Ay-Py)
// Pay attention the order of ux, uy, uz
//
// https://www.cnblogs.com/fortunely/p/18180929
Vec3f barycentric(Vec2f A, Vec2f B, Vec2f C, Vec2f P) {
    auto [Ax, Bx, Cx, Px] = std::make_tuple(A.x(), B.x(), C.x(), P.x());
    auto [Ay, By, Cy, Py] = std::make_tuple(A.y(), B.y(), C.y(), P.y());
    Vec3f s1{ Bx-Ax, Cx-Ax, Ax-Px };
    Vec3f s2{ By-Ay, Cy-Ay, Ay-Py };
    Vec3f u = crossProduct(s1, s2);
    if (std::abs(u[2]) > 1e-2) {
        return Vec3f{1.f - (u.x() + u.y()) / u.z(), u.x() / u.z(), u.y() / u.z()};}
    return Vec3f{-1, 1, 1};
}

void triangle(Vec4f *pts, IShader &shader, TGAImage &image, TGAImage &zbuffer) {
    Vec2f bboxmin{ std::numeric_limits<float>::max(),  std::numeric_limits<float>::max()};
    Vec2f bboxmax{ std::numeric_limits<float>::lowest(), std::numeric_limits<float>::lowest()};
    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 2; j++) {
            bboxmin[j] = std::min(bboxmin[j], pts[i][j] / pts[i][3]);
            bboxmax[j] = std::max(bboxmax[j], pts[i][j] / pts[i][3]);
        }
    }

    Vec2i P;
    TGAColor color;
    for (P[0] = bboxmin.x(); P.x() <= bboxmax.x(); P[0]++) {
        for (P[1] = bboxmin.y(); P[1] <= bboxmax.y(); P[1]++) {
            // TODO: why xyz/ w in screen space here, rather than in NDC?
            Vec3f c = barycentric(proj<float, 4, 2>(pts[0] / pts[0].w()),
                                  proj<float, 4, 2>(pts[1] / pts[1].w()),
                                  proj<float, 4, 2>(pts[2] / pts[2].w()),
                                  P);
            const auto [alpha, beta, gamma] = std::make_tuple(c.x(), c.y(), c.z());
            // Pz = Az * alpha + Bz * beta + Cz * gamma
            float z = pts[0].z() * alpha + pts[1].z() * beta + pts[2].z() * gamma;
            // Pw = Aw * alpha + Bw * beta + Cw * gamma
            float w = pts[0].w() * alpha + pts[1].w() * beta + pts[2].w() * gamma;
            // round(z / w), varying in [0, depth]
            // why Pz / Pw here ?
            const int depth = 255;
            int frag_depth = std::max(0, std::min(depth, static_cast<int>(z / w + .5)));
            if (alpha < 0 || beta < 0 || gamma < 0 || zbuffer.get(P.x(), P.y())[0] >= frag_depth) {
                continue;
            }
            bool discard = shader.fragment(c, color);
            if (!discard) {
                zbuffer.set(P.x(), P.y(), TGAColor(frag_depth)); // store frag depth
                image.set(P.x(), P.y(), color);
            }
        }
    }
}

// pts = Points
void triangle(Mat<float, 4, 3> &clipc, IShader &shader, TGAImage &image, std::vector<float> &zbuffer) {
    // 3 triangle, 4-dim coordinates. 1 row = 1 triangle

    Mat<float, 3, 4> pts = (Viewport * clipc).transposed(); // transposed to ease access to each of the points
    Mat<float, 3, 2> pts2; // 3 triangle, 2-dim coordinates. 1 row = 1 triangle
    for (int i = 0; i < 3; i++) {
        auto v = pts.getRow(i) / pts.getRow(i).w(); // (x / w, y / w)
        pts2.setRow(i, proj<float, 4, 2>(v));
    }

    Vec2f bboxmin{ std::numeric_limits<float>::max(), std::numeric_limits<float>::max() };
    Vec2f bboxmax{ std::numeric_limits<float>::min(), std::numeric_limits<float>::min() };
    Vec2f clamp{ static_cast<float>(image.width() - 1), static_cast<float>(image.height() - 1) };
    for (int i = 0; i < 3; i++) { // triangle 0-2
        for (int j = 0; j < 2; j++) { // xyz
            bboxmin[j] = std::max(0.f,      std::min(bboxmin[j], pts2.getValue(i, j)));
            bboxmax[j] = std::min(clamp[j], std::max(bboxmax[j], pts2.getValue(i, j)));
        }
    }
    Vec2i P;
    TGAColor color;
    for (P[0] = bboxmin.x(); P[0] <= bboxmax.x(); P[0]++) {
        for (P[1] = bboxmin.y(); P[1] <= bboxmax.y(); P[1]++) {

            // P's barycentric coordinate (alpha, beta, gamma) of the triangle
            Vec3f bc_screen = barycentric(pts2.getRow(0), pts2.getRow(1), pts2.getRow(2), P);
#if 0
            // alpha/w, beta/w, gamma/w
            Vec3f bc_clip = Vec3f{ bc_screen.x() / pts.getValue(0, 3), bc_screen.y() / pts.getValue(1, 3), bc_screen.z() / pts.getValue(2, 3)};
            // bc_clip/(alpha + beta + gamma)
            bc_clip /= bc_clip.x() + bc_clip.y() + bc_clip.z(); // why?
            float frag_depth = clipc.getRow(2) * bc_clip;
#else
            const auto [alpha, beta, gamma] = std::make_tuple(bc_screen.x(), bc_screen.y(), bc_screen.z());

            assert(pts.getValue(0, 3)); assert(pts.getValue(1, 3)); assert(pts.getValue(2, 3));
            // if (pts.getValue(0, 3) == 0 || pts.getValue(1, 3) == 0 || pts.getValue(2, 3) == 0) {
            //     continue;
            // }

            Vec3f bc_clip{ alpha / pts.getValue(0, 3), beta / pts.getValue(1, 3), gamma / pts.getValue(2, 3) };
            float t = bc_clip.x() + bc_clip.y() + bc_clip.z();
            assert(t);
            // if (t == 0) continue;

            bc_clip /= t;
            float frag_depth = clipc.getRow(2) * bc_clip;
#endif
            auto zb = zbuffer[P.x() + P.y() * image.width()];
            if (bc_screen.x() < 0 || bc_screen.y() < 0 || bc_screen.z() < 0 || zb > frag_depth) {
                continue;
            }

            bool discard = shader.fragment(bc_clip, color);
            if (!discard) {
                zbuffer[P.x() + P.y() * image.width()] = frag_depth;
                image.set(P.x(), P.y(), color);
            }
        }
    }
}

void triangle(Vec4f *pts, IShader &shader, TGAImage &image, std::vector<float> &zbuffer) {
    Vec2f bboxmin{ std::numeric_limits<float>::max(),  std::numeric_limits<float>::max()};
    Vec2f bboxmax{ std::numeric_limits<float>::lowest(), std::numeric_limits<float>::lowest()};
    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 2; j++) {
            bboxmin[j] = std::min(bboxmin[j], pts[i][j] / pts[i].w());
            bboxmax[j] = std::max(bboxmax[j], pts[i][j] / pts[i].w());
        }
    }

    Vec2i P;
    TGAColor color;
    for (P[0] = bboxmin.x(); P[0] <= bboxmax.x(); P[0]++) {
        for (P[1] = bboxmin.y(); P[1] <= bboxmax.y(); P[1]++) {
            Vec3f c = barycentric(proj<float, 4, 2>(pts[0]/pts[0].w()),
                                  proj<float, 4, 2>(pts[1]/pts[1].w()),
                                  proj<float, 4, 2>(pts[2]/pts[2].w()),
                                  P);
            const auto [alpha, beta, gamma] = std::make_tuple(c.x(), c.y(), c.z());
            float z = pts[0].z() * alpha + pts[1].z() * beta + pts[2].z() * gamma;
            float w = pts[0].w() * alpha + pts[1].w() * beta + pts[2].w() * gamma;
            float frag_depth = z / w;
            if (alpha < 0 || beta < 0 || gamma < 0 || zbuffer[P.x() + P.y() * image.width()] > frag_depth) {
                continue;
            }
            bool discard = shader.fragment(c, color);
            if (!discard) {
                zbuffer[P.x() + P.y() * image.width()] = frag_depth;
                image.set(P.x(), P.y(), color);
            }
        }
    }
}
